#load necessary packages
library(raster)
library(blockCV)
library(biomod2)
library(ggplot2)
library(ggthemes)
library(snowfall)
library(dismo)
library(ROCR)
library(gbm)
library(modEvA)

#load species data
load("Data/PA.RData")
#check for NA in PA
any(is.na(PA))
#yes there are NAs, remove
PA <- na.omit(PA)
#load predictor data
source("Scripts/load_var_stack.R")
#subset dataframe to species
species <- PA[,37:72]
species <- species[,c(3,6,7,9,10,12,15:18,24,26,28,34)]
sp.names <- c('eucapauc', 'eucaobli', 'eucapilu', 'corymacu')
species <- species[,which(colnames(species) %in% sp.names)]
nsp <- ncol(species)
#build covariate data (we do not use raster stacks here because it takes too long to calculate)
cov.names <- c("bc02", "bc04", "bc05", "bc12", "bc14", "bc21", "bc32", "bc33", "rjja", "rsea", "rugg")
covariates <- PA[,which(colnames(PA) %in% cov.names)]

all.sp.data <- cbind(PA[,2:3],covariates,species)
#remove unused variable
rm(species,covariates)

slice <- c(-38,-36,-34,-32,-30,-28)

#get all PA points from either southern or northern training fold, and explore extrapolation
env.points.south <- all.sp.data[all.sp.data[,2] > slice[1] & all.sp.data[,2] < slice[5],3:13]
env.points.north <- all.sp.data[all.sp.data[,2] > slice[2] & all.sp.data[,2] < slice[6],3:13]

system.time(
    MESS.south.training.points <- mess(var.stack,env.points.south)
)
gc()
MESS.north.training.points <- mess(var.stack,env.points.north)



MESS.south.training.points.plot <- MESS.south.training.points
MESS.south.training.points.plot[MESS.south.training.points.plot > 0] <- 0
MESS.north.training.points.plot <- MESS.north.training.points
MESS.north.training.points.plot[MESS.north.training.points.plot > 0] <- 0


colpalette <- colorRampPalette(c("red","white"),interpolate = "linear", space = "rgb", bias = 8)
png("Output/test mess south.png", width = 1024, height = 1024)
plot(MESS.south.training.points.plot, main = c(), col = colpalette(80), colNA = "grey")
abline(h = -30)
#points(all.sp.data[all.sp.data[,2] > slice[1] & all.sp.data[,2] < slice[5],1:2], pch = 19, cex = 0.15)
dev.off()
png("Output/test mess north.png", width = 1024, height = 1024)
plot(MESS.north.training.points.plot, main = c(), col = colpalette(80), colNA = "grey")
abline(h = -36)
dev.off()


test <- sampleRandom(var.stack,10000)
test <- as.data.frame(test)
range(test[,1])
range(env.points.north[,1])

###looks like only the northern training set is possibly extrapolating, find out which variables are out of space
northset.extrapolation.try <- MESS(env.points.north,test)
#its rjja

ggplot(env.points.north, aes(x = rjja)) + geom_density() + geom_density(data = test, aes(x= rjja), col = "green")
#yeah so a few high rainfall regions not present in the northern dataset
